from selenium import webdriver
import requests
from bs4 import BeautifulSoup
from lxml import etree
import re
import random
import time
i=1
n=0
##browser = webdriver.Chrome() 
##browser.get('https://www.amazon.co.jp/')
##cookie = browser.get_cookies()
##print(cookie)
##browser.quit()
cookie = {
"cisession":"19dfd70a27ec0eecf1fe3fc2e48b7f91c7c83c60",
"CNZZDATA100020196":"1815846425-1478580135-https%253A%252F%252Fwww.baidu.com%252F%7C1483922031",
"Hm_lvt_f805f7762a9a237a0deac37015e9f6d9":"1482722012,1483926313",
"Hm_lpvt_f805f7762a9a237a0deac37015e9f6d9":"1483926368"
}
kv = {'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
pattern = re.compile(r'"url":"(https?:[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|])","iconUrl"')
pattern1 = re.compile(r'"list":\[(.*?)\],')
pattern2 = re.compile(r'"raw":"(\b(?!null)\S+)","normalized"')
def fh(url):
    i=1
    time.sleep(random.random())
    
    r = requests.get(url+"&qid="+str(int(time.time())), cookies=cookie,headers=kv)
    r.encoding = r.apparent_encoding
    print(r.text)
    result1=result2=[]
    result = pattern.findall(r.text)
    result1 = pattern1.findall(r.text)
    result2 = pattern2.findall(r.text)
    t=pattern2.findall(r.text)
    print(t)
    if (result !=[]):
##	    print(result)
	    f.write('"'+url+'",')
	    for line in result:
		    print(line)
		    i=i+1
		    f.write(line+',')
	    for num in range(i,6):
		    f.write(',')
		    print(num)
	    result1 = pattern1.findall(r.text)
	    i=1
	    if (result1 !=[]):
		    for line in result1[0].split(','):
			    print(line)
			    f.write(line+',')
			    i=i+1
		    for num in range(i,4):
			    f.write(',')
			    print(num)
	    else:
		    f.write(',,,')
	    
	    if (result2 !=[]):
		    print(result2[0])
		    f.write(result2[0]+'\n')
	    else:
		    f.write('\n')
    elif(t !=[]):
        print(t[0])
        f.write('"'+url+'",,,,,,')
        s=1
        if (result1 !=[]):
            for line in result1[0].split(','):
                print(line)
                f.write(line+',')
                s=s+1
            for num in range(s,4):
                f.write(',')
                print(num)
        else:
            f.write(',,,')
	    
        f.write(t[0]+'\n')




with open("mk.txt", mode="r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        n += 1
        if (n<1):
            continue
        f = open(f"zuihuo.csv", mode="a", encoding="utf-8")
        fh(line)
        print(n)
        f.close()
f.close()
print("写入完毕")
